# ===============================================================#
#                     Replication files for:                     #
#.  "Attitudinal and Behavioral Legacies of Wartime Violence:    #
#                      A Meta-Analysis"                          #
#                        Joan Barceló                            #
#               American Political Science Review                #
#               Last update: September 3, 2025                   #
# ===============================================================#

###############################################
# Appendix I/Tables I1, I2, I3: Robustness to excluding outliers
###############################################

## ------------------------ Load Packages ------------------------

library(dplyr)
library(tidyr)
library(metafor)
library(knitr)
library(kableExtra)

# ---------- Paths and file maps ----------

BASE <- "~/Datasets/"

# Outcome -> filename maps

files_full <- c(
  "Social groups participation" = "meta.groups.csv",
  "Voting"                      = "meta.voting.csv",
  "Generalized trust"           = "meta.generalized_trust.csv",
  "Political participation"     = "meta.political_participation.csv",
  "Interest / knowledge"        = "meta.political_interest.csv",
  "Normative prosociality"      = "meta.normative.csv",
  "Community leadership"        = "meta.leadership.csv",
  "Altruism"                    = "meta.altruism.csv",
  
  "Attitudes toward wartime enemies" = "meta.warenemies.csv",
  "Threat perceptions"               = "meta.threat.csv",
  "Intergroup attitudes"             = "meta.intergroup.csv",
  
  "Group voting"         = "meta.groupvoting.csv",
  "Group identification" = "meta.groupid.csv",
  "Ingroup trust"        = "meta.ingroup_trust.csv",
  
  "Authoritarian attitudes"      = "meta.authoritarian.csv",
  "Hawkish security preferences" = "meta.hawkish.csv",
  "Support for punitive justice" = "meta.punitive.csv",
  "Extreme ideology"             = "meta.xtrideology.csv",
  "Social intolerance"           = "meta.social_intolerance.csv",
  "Antagonism toward peace"      = "meta.antipeace.csv",
  "Institutional distrust"       = "meta.instmistrust.csv",
  "Political intolerance"        = "meta.political_intolerance.csv"
)

# Derive Bivariate and Quasi-experimental maps
files_biv <- sub("\\.csv$", "_biv.csv",   files_full)
files_qe  <- sub("\\.csv$", "_random.csv", files_full)

# Some QE outcomes do not exist in your data repo
# They will be handled gracefully if the file is missing

# Row order in tables (match screenshots)
row_order <- c(
  "Social groups participation","Voting","Generalized trust","Political participation",
  "Interest / knowledge","Normative prosociality","Community leadership","Altruism",
  "Attitudes toward wartime enemies","Threat perceptions","Intergroup attitudes",
  "Group voting","Group identification","Ingroup trust",
  "Authoritarian attitudes","Hawkish security preferences","Support for punitive justice",
  "Extreme ideology","Social intolerance","Antagonism toward peace","Institutional distrust",
  "Political intolerance"
)

# Country abbreviation mapping used in the “Excluding Most Common Country” column
abbr <- c(
  "Afghanistan"="AF","Austria"="AT","Bosnia"="BA","Colombia"="CL","Finland"="FI",
  "Israel"="IS","India"="IN","Lebanon"="LB","Northern Ireland"="NI","Spain"="SP",
  "Sri Lanka"="SL","Tajikistan"="TJ","Turkey"="TK","Uganda"="UG","United Kingdom"="UK",
  "Ukraine"="UA"
)

abbrize <- function(v) {
  x <- trimws(as.character(v))
  labs <- ifelse(x %in% names(abbr), abbr[x], toupper(substr(x, 1, 2)))
  paste(unique(labs[!is.na(labs) & nzchar(labs)]), collapse = ", ")
}

pfmt <- function(p) {
  if (is.na(p)) return("")
  if (p < .001) "< .001" else sprintf("%.3f", p)
}
num <- function(x) ifelse(is.na(x), "", sprintf("%.2f", x))

safe_read <- function(file) {
  f <- file.path(BASE, file)
  if (!file.exists(f)) return(NULL)
  tryCatch(read.csv(f, sep = ",", stringsAsFactors = FALSE), error = function(e) NULL)
}

# Build named lists of data frames (NULL if missing)
datasets_full <- lapply(files_full, safe_read)
datasets_biv  <- lapply(files_biv,  safe_read)
datasets_qe   <- lapply(files_qe,   safe_read)

# ---------------- 1) Core estimator used in all tables ----------------
# Given a data frame, compute the three robustness columns:
# A) Excluding top 10 percent absolute yi
# B) Excluding top 10 percent highest variance
# C) Excluding most common country (ties allowed)
# Returns a 1-row data frame with formatted cells for each column

compute_three_columns <- function(df, outcome_label) {
  # If data missing, return NA row
  if (is.null(df)) {
    return(data.frame(
      Outcome = outcome_label,
      k_ext = NA_integer_, br_ext = NA_real_, lb_ext = NA_real_, ub_ext = NA_real_, p_ext = NA_real_,
      k_var = NA_integer_, br_var = NA_real_, lb_var = NA_real_, ub_var = NA_real_, p_var = NA_real_,
      country_drop = NA_character_, k_cty = NA_integer_,
      br_cty = NA_real_, lb_cty = NA_real_, ub_cty = NA_real_, p_cty = NA_real_,
      stringsAsFactors = FALSE
    ))
  }
  
  # Coerce and clean
  df$yi <- suppressWarnings(as.numeric(df$yi))
  df$vi <- suppressWarnings(as.numeric(df$vi))
  df$authoryear <- as.factor(df$authoryear)
  df$country <- as.character(df$country)
  df <- df[is.finite(df$yi) & is.finite(df$vi) & df$vi > 0, , drop = FALSE]
  if (!nrow(df)) {
    return(compute_three_columns(NULL, outcome_label))
  }
  df$effect_id <- seq_len(nrow(df))
  
  # Helper that runs rma.mv and returns est, se, ci, p
  run_re <- function(dat) {
    if (nrow(dat) <= 1 || length(unique(dat$authoryear)) <= 1) {
      return(c(NA_real_, NA_real_, NA_real_, NA_real_))
    }
    fit <- tryCatch(rma.mv(yi = yi, V = vi, random = ~1 | authoryear/effect_id,
                           data = dat, method = "REML"),
                    error = function(e) NULL)
    if (is.null(fit)) return(c(NA_real_, NA_real_, NA_real_, NA_real_))
    c(as.numeric(fit$b[1]), as.numeric(fit$ci.lb[1]),
      as.numeric(fit$ci.ub[1]), as.numeric(fit$pval[1]))
  }
  
  # A) Excluding extreme effects (top 10 percent by |yi|)
  cutoff_abs <- as.numeric(quantile(abs(df$yi), 0.90, na.rm = TRUE))
  df_ext <- df[abs(df$yi) <= cutoff_abs, , drop = FALSE]
  k_ext <- nrow(df_ext)
  re_ext <- run_re(df_ext)  # br, lb, ub, p
  
  # B) Excluding high-variance estimates (top 10 percent by vi)
  cutoff_vi <- as.numeric(quantile(df$vi, 0.90, na.rm = TRUE))
  df_var <- df[df$vi <= cutoff_vi, , drop = FALSE]
  k_var <- nrow(df_var)
  re_var <- run_re(df_var)
  
  # C) Excluding most common country (ties possible)
  df_cty <- df
  if ("country" %in% names(df) && any(nzchar(df$country))) {
    ct_counts <- sort(table(df$country), decreasing = TRUE)
    top_cty <- names(ct_counts)[ct_counts == max(ct_counts)]
    removed_label <- abbrize(top_cty)
    df_cty <- df[!(df$country %in% top_cty), , drop = FALSE]
  } else {
    removed_label <- NA_character_
    df_cty <- df[0, , drop = FALSE]
  }
  k_cty <- nrow(df_cty)
  re_cty <- run_re(df_cty)
  
  data.frame(
    Outcome = outcome_label,
    k_ext = k_ext, br_ext = re_ext[1], lb_ext = re_ext[2], ub_ext = re_ext[3], p_ext = re_ext[4],
    k_var = k_var, br_var = re_var[1], lb_var = re_var[2], ub_var = re_var[3], p_var = re_var[4],
    country_drop = removed_label, k_cty = k_cty,
    br_cty = re_cty[1], lb_cty = re_cty[2], ub_cty = re_cty[3], p_cty = re_cty[4],
    stringsAsFactors = FALSE
  )
}

# ---------------- 2) Run for each model set ----------------
# FULL
res_full <- do.call(rbind, lapply(names(datasets_full), function(nm) {
  compute_three_columns(datasets_full[[nm]], nm)
}))

# BIVARIATE
res_biv <- do.call(rbind, lapply(names(datasets_biv), function(nm) {
  compute_three_columns(datasets_biv[[nm]], nm)
}))

# QUASI-EXPERIMENTAL
res_qe <- do.call(rbind, lapply(names(datasets_qe), function(nm) {
  compute_three_columns(datasets_qe[[nm]], nm)
}))

# Vectorized helpers
pfmt <- function(p) ifelse(is.na(p), "",
                           ifelse(p < 0.001, "< .001", sprintf("%.3f", p)))
num  <- function(x) ifelse(is.na(x), "", sprintf("%.2f", x))

# Ensure row order and keep only outcomes in row_order
order_and_format <- function(df, keep_country = TRUE) {
  df <- df %>%
    filter(Outcome %in% row_order) %>%
    mutate(Outcome = factor(Outcome, levels = row_order)) %>%
    arrange(Outcome) %>%
    mutate(Outcome = as.character(Outcome))
  
  # Build formatted columns for each of the three panels
  panel_A <- df %>%
    transmute(
      Outcome,
      k_A = k_ext,
      br_A = num(br_ext),
      ci_A = ifelse(is.na(lb_ext), "",
                    paste0("[", num(lb_ext), ", ", num(ub_ext), "]")),
      p_A  = pfmt(p_ext)
    )
  panel_B <- df %>%
    transmute(
      Outcome,
      k_B = k_var,
      br_B = num(br_var),
      ci_B = ifelse(is.na(lb_var), "",
                    paste0("[", num(lb_var), ", ", num(ub_var), "]")),
      p_B  = pfmt(p_var)
    )
  panel_C <- df %>%
    transmute(
      Outcome,
      Country = if (keep_country) ifelse(is.na(country_drop), "", country_drop) else "",
      k_C = k_cty,
      br_C = num(br_cty),
      ci_C = ifelse(is.na(lb_cty), "",
                    paste0("[", num(lb_cty), ", ", num(ub_cty), "]")),
      p_C  = pfmt(p_cty)
    )
  
  panel_A %>%
    left_join(panel_B, by = "Outcome") %>%
    left_join(panel_C, by = "Outcome")
}

#Table I.1
tab_full <- order_and_format(res_full)
print(tab_full)

#Table I.2
tab_biv  <- order_and_format(res_biv)
print(tab_biv)

#Table I.3
tab_qe   <- order_and_format(res_qe)
print(tab_qe)
